Libraries

library(tidyverse)
library(lubridate)
library(imager)
library(stringi)
library(reshape)

Import

train = read_csv("Training_data.csv", skip = 1)

Clean

# Fix variable names
names(train) = c("file", "time", "weather", "smoke", "pelicans", "pods", "npods", "disturbance", "stageofnesting", "preds", "abandon", "pretty")
# Remove appostrophe from file
train$file = substr(train$file,1,nchar(train$file)-1)
# Factor and clean factors
train[,c(2:6,8:12)] <- lapply(train[,c(2:6,8:12)], factor)
levels(train$weather) = c("cloud", "cloud", "cloud", "rain", "sun")
# Make NAs meaningful
# Create a date variable for time series
train$date = substr(train$file,1,nchar(train$file)-4)
train$date = ymd_hms(train$date)
# Look at progress
glimpse(train)
Observations: 1,011
Variables: 13
$ file           <chr> "20170309002000.jpg", "20170309014000.jpg", "20170309091500.jpg", "20170309162000.j...
$ time           <fct> day, day, night, day, day, night, day, night, day, day, night, night, day, day, day...
$ weather        <fct> sun, NA, NA, sun, sun, NA, cloud, NA, sun, cloud, NA, NA, sun, sun, sun, NA, NA, NA...
$ smoke          <fct> FALSE, NA, NA, FALSE, FALSE, NA, FALSE, NA, FALSE, FALSE, NA, NA, FALSE, FALSE, FAL...
$ pelicans       <fct> FALSE, NA, NA, FALSE, FALSE, NA, FALSE, NA, TRUE, TRUE, NA, NA, TRUE, TRUE, FALSE, ...
$ pods           <fct> FALSE, NA, NA, FALSE, FALSE, NA, FALSE, NA, TRUE, TRUE, NA, NA, TRUE, TRUE, FALSE, ...
$ npods          <int> NA, NA, NA, NA, NA, NA, NA, NA, 1, 1, NA, NA, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA,...
$ disturbance    <fct> TRUE, NA, NA, FALSE, FALSE, NA, FALSE, NA, FALSE, FALSE, NA, NA, FALSE, FALSE, FALS...
$ stageofnesting <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ preds          <fct> FALSE, NA, NA, FALSE, FALSE, NA, FALSE, NA, FALSE, FALSE, NA, NA, FALSE, FALSE, FAL...
$ abandon        <fct> NA, NA, NA, NA, NA, NA, NA, NA, FALSE, FALSE, NA, NA, FALSE, FALSE, NA, NA, NA, NA,...
$ pretty         <fct> NA, NA, NA, FALSE, FALSE, NA, FALSE, NA, FALSE, FALSE, NA, NA, FALSE, FALSE, FALSE,...
$ date           <dttm> 2017-03-09 00:20:00, 2017-03-09 01:40:00, 2017-03-09 09:15:00, 2017-03-09 16:20:00...
# Remove uninformative variables
train = train[,c(1:8,12,13)]

Output

write_csv(train, "train_clean.csv")

EDA

ggplot(train, aes(x = date, y = npods)) + 
  geom_point(alpha = 0.25) + 
  geom_smooth(se = FALSE) + 
  scale_y_continuous(limits = c(0,10)) + 
  scale_x_datetime(date_breaks = "1 month", date_labels = "%b") + 
  labs(title = "Number of Pelican Pods: 2017",
       x = "Date",
       y = "Number of Pods")

View Pretty Photos

pretty = subset(train, train$pretty == TRUE)
pimg = rep(NA, nrow(pretty))
for (i in 1:nrow(pretty)) {
  pimg[i] = stri_replace_all_fixed(paste("PeliPhotos1Folder/", pretty$file[i], collapse = ""), 
                                            pattern = " ", replacement = "")
}
imlist = imlist(load.image(pimg[1]), load.image(pimg[2]), load.image(pimg[3]), load.image(pimg[4]),
                load.image(pimg[5]), load.image(pimg[6]), load.image(pimg[7]), load.image(pimg[8]),
                load.image(pimg[9]), load.image(pimg[10]), load.image(pimg[11]), load.image(pimg[12]),
                load.image(pimg[13]), load.image(pimg[14]))
#imlist = as.data.frame(imlist)
for (i in 1:14) {
  plot(imlist[i])
}

Weather

ggplot(train, aes(x = date, y = weather)) + geom_point()

Smoke

smoke = subset(train, train$smoke == TRUE)
simg = rep(NA, nrow(smoke))
for (i in 1:nrow(smoke)) {
  simg[i] = stri_replace_all_fixed(paste("PeliPhotos1Folder/", smoke$file[i], collapse = ""), 
                                            pattern = " ", replacement = "")
}
imlist = imlist(load.image(simg[1]), load.image(simg[2]))
JPEG decompression: Premature end of JPEG file
#imlist = as.data.frame(imlist)
for (i in 1:2) {
  plot(imlist[i])
}

Disturbance

Notes

abandonment, stageofnesting and predators are all false or NA and are, therefore, useless to us.

LS0tCnRpdGxlOiAiQ2xlYW5pbmcgVHJhaW5pbmciCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCiMjIyBMaWJyYXJpZXMKCmBgYHtyLCBtZXNzYWdlID0gRkFMU0UsIHdhcm5pbmcgPSBGQUxTRX0KbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkobHVicmlkYXRlKQpsaWJyYXJ5KGltYWdlcikKbGlicmFyeShzdHJpbmdpKQpsaWJyYXJ5KHJlc2hhcGUpCmBgYAoKIyMjIEltcG9ydAoKYGBge3IsIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9CnRyYWluID0gcmVhZF9jc3YoIlRyYWluaW5nX2RhdGEuY3N2Iiwgc2tpcCA9IDEpCmBgYAoKIyMjIENsZWFuCgpgYGB7cn0KIyBGaXggdmFyaWFibGUgbmFtZXMKbmFtZXModHJhaW4pID0gYygiZmlsZSIsICJ0aW1lIiwgIndlYXRoZXIiLCAic21va2UiLCAicGVsaWNhbnMiLCAicG9kcyIsICJucG9kcyIsICJkaXN0dXJiYW5jZSIsICJzdGFnZW9mbmVzdGluZyIsICJwcmVkcyIsICJhYmFuZG9uIiwgInByZXR0eSIpCgojIFJlbW92ZSBhcHBvc3Ryb3BoZSBmcm9tIGZpbGUKdHJhaW4kZmlsZSA9IHN1YnN0cih0cmFpbiRmaWxlLDEsbmNoYXIodHJhaW4kZmlsZSktMSkKCiMgRmFjdG9yIGFuZCBjbGVhbiBmYWN0b3JzCnRyYWluWyxjKDI6Niw4OjEyKV0gPC0gbGFwcGx5KHRyYWluWyxjKDI6Niw4OjEyKV0sIGZhY3RvcikKbGV2ZWxzKHRyYWluJHdlYXRoZXIpID0gYygiY2xvdWQiLCAiY2xvdWQiLCAiY2xvdWQiLCAicmFpbiIsICJzdW4iKQoKIyBNYWtlIE5BcyBtZWFuaW5nZnVsCgoKIyBDcmVhdGUgYSBkYXRlIHZhcmlhYmxlIGZvciB0aW1lIHNlcmllcwp0cmFpbiRkYXRlID0gc3Vic3RyKHRyYWluJGZpbGUsMSxuY2hhcih0cmFpbiRmaWxlKS00KQp0cmFpbiRkYXRlID0geW1kX2htcyh0cmFpbiRkYXRlKQoKIyBMb29rIGF0IHByb2dyZXNzCmdsaW1wc2UodHJhaW4pCgojIFJlbW92ZSB1bmluZm9ybWF0aXZlIHZhcmlhYmxlcwp0cmFpbiA9IHRyYWluWyxjKDE6OCwxMiwxMyldCmBgYAoKIyMjIE91dHB1dAoKYGBge3J9CndyaXRlX2Nzdih0cmFpbiwgInRyYWluX2NsZWFuLmNzdiIpCmBgYAoKIyMjIEVEQQoKYGBge3IsIG1lc3NhZ2UgPSBGQUxTRSwgd2FybmluZyA9IEZBTFNFfQpnZ3Bsb3QodHJhaW4sIGFlcyh4ID0gZGF0ZSwgeSA9IG5wb2RzKSkgKyAKICBnZW9tX3BvaW50KGFscGhhID0gMC4yNSkgKyAKICBnZW9tX3Ntb290aChzZSA9IEZBTFNFKSArIAogIHNjYWxlX3lfY29udGludW91cyhsaW1pdHMgPSBjKDAsMTApKSArIAogIHNjYWxlX3hfZGF0ZXRpbWUoZGF0ZV9icmVha3MgPSAiMSBtb250aCIsIGRhdGVfbGFiZWxzID0gIiViIikgKyAKICBsYWJzKHRpdGxlID0gIk51bWJlciBvZiBQZWxpY2FuIFBvZHM6IDIwMTciLAogICAgICAgeCA9ICJEYXRlIiwKICAgICAgIHkgPSAiTnVtYmVyIG9mIFBvZHMiKQpgYGAKCmBgYHtyfQoKYGBgCgojIyMgVmlldyBQcmV0dHkgUGhvdG9zCgpgYGB7cn0KcHJldHR5ID0gc3Vic2V0KHRyYWluLCB0cmFpbiRwcmV0dHkgPT0gVFJVRSkKcGltZyA9IHJlcChOQSwgbnJvdyhwcmV0dHkpKQpmb3IgKGkgaW4gMTpucm93KHByZXR0eSkpIHsKICBwaW1nW2ldID0gc3RyaV9yZXBsYWNlX2FsbF9maXhlZChwYXN0ZSgiUGVsaVBob3RvczFGb2xkZXIvIiwgcHJldHR5JGZpbGVbaV0sIGNvbGxhcHNlID0gIiIpLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBwYXR0ZXJuID0gIiAiLCByZXBsYWNlbWVudCA9ICIiKQp9CgppbWxpc3QgPSBpbWxpc3QobG9hZC5pbWFnZShwaW1nWzFdKSwgbG9hZC5pbWFnZShwaW1nWzJdKSwgbG9hZC5pbWFnZShwaW1nWzNdKSwgbG9hZC5pbWFnZShwaW1nWzRdKSwKICAgICAgICAgICAgICAgIGxvYWQuaW1hZ2UocGltZ1s1XSksIGxvYWQuaW1hZ2UocGltZ1s2XSksIGxvYWQuaW1hZ2UocGltZ1s3XSksIGxvYWQuaW1hZ2UocGltZ1s4XSksCiAgICAgICAgICAgICAgICBsb2FkLmltYWdlKHBpbWdbOV0pLCBsb2FkLmltYWdlKHBpbWdbMTBdKSwgbG9hZC5pbWFnZShwaW1nWzExXSksIGxvYWQuaW1hZ2UocGltZ1sxMl0pLAogICAgICAgICAgICAgICAgbG9hZC5pbWFnZShwaW1nWzEzXSksIGxvYWQuaW1hZ2UocGltZ1sxNF0pKQojaW1saXN0ID0gYXMuZGF0YS5mcmFtZShpbWxpc3QpCmZvciAoaSBpbiAxOjE0KSB7CiAgcGxvdChpbWxpc3RbaV0pCn0KYGBgCgojIyMgV2VhdGhlcgoKYGBge3J9CmdncGxvdCh0cmFpbiwgYWVzKHggPSBkYXRlLCB5ID0gd2VhdGhlcikpICsgZ2VvbV9wb2ludCgpCmBgYAoKIyMjIFNtb2tlCgpgYGB7cn0Kc21va2UgPSBzdWJzZXQodHJhaW4sIHRyYWluJHNtb2tlID09IFRSVUUpCnNpbWcgPSByZXAoTkEsIG5yb3coc21va2UpKQpmb3IgKGkgaW4gMTpucm93KHNtb2tlKSkgewogIHNpbWdbaV0gPSBzdHJpX3JlcGxhY2VfYWxsX2ZpeGVkKHBhc3RlKCJQZWxpUGhvdG9zMUZvbGRlci8iLCBzbW9rZSRmaWxlW2ldLCBjb2xsYXBzZSA9ICIiKSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcGF0dGVybiA9ICIgIiwgcmVwbGFjZW1lbnQgPSAiIikKfQoKaW1saXN0ID0gaW1saXN0KGxvYWQuaW1hZ2Uoc2ltZ1sxXSksIGxvYWQuaW1hZ2Uoc2ltZ1syXSkpCiNpbWxpc3QgPSBhcy5kYXRhLmZyYW1lKGltbGlzdCkKZm9yIChpIGluIDE6MikgewogIHBsb3QoaW1saXN0W2ldKQp9CmBgYAoKIyMjIERpc3R1cmJhbmNlCgpgYGB7cn0KZGYgPSBuYS5vbWl0KHRyYWluW3RyYWluJGRpc3R1cmJhbmNlID09IFRSVUUsYygiZGF0ZSIsICJkaXN0dXJiYW5jZSIpXSkKZ2dwbG90KGRmLCBhZXMoeCA9IGRhdGUsIHkgPSBkaXN0dXJiYW5jZSkpICsgCiAgZ2VvbV9wb2ludChhbHBoYSA9IDAuNSkKYGBgCgojIyMgTm90ZXMKCmBhYmFuZG9ubWVudGAsIGBzdGFnZW9mbmVzdGluZ2AgYW5kIGBwcmVkYXRvcnNgIGFyZSBhbGwgZmFsc2Ugb3IgTkEgYW5kIGFyZSwgdGhlcmVmb3JlLCB1c2VsZXNzIHRvIHVzLiAKCgoKCgoKCgoK